In [1]:
import pandas as pd
import numpy as np
import random

import matplotlib.pyplot as plt
import seaborn as sns

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

colorarr = ['#0592D0','#Cd7f32', '#E97451', '#Bdb76b', '#954535', '#C2b280', '#808000','#C2b280', '#E4d008', '#9acd32', '#Eedc82', '#E4d96f',
           '#32cd32','#39ff14','#00ff7f', '#008080', '#36454f', '#F88379', '#Ff4500', '#Ffb347', '#A94064', '#E75480', '#Ffb6c1', '#E5e4e2',
           '#Faf0e6', '#8c92ac', '#Dbd7d2','#A7a6ba', '#B38b6d']
In [2]:
cropdf = pd.read_csv("Research/datatrain2.csv")
cropdf.head()
Out[2]:
N P K Temperature Humidity PH Breed
0 121.6 28.6 58 29.82046 57.48837 5.99954 AT 356
1 133.7 28.8 58 26.92841 54.05204 5.98766 AT 356
2 134.6 39.9 56 25.78214 65.78586 5.96768 AT 356
3 122.3 38.4 54 29.64071 60.17039 6.13613 AT 356
4 137.5 27.7 55 28.16842 67.62975 5.54322 AT 356
In [3]:
cropdf.shape
Out[3]:
(2118, 7)
In [4]:
cropdf.columns
Out[4]:
Index(['N', 'P', 'K', 'Temperature', 'Humidity', 'PH', 'Breed'], dtype='object')
In [5]:
cropdf.isnull().any()
Out[5]:
N              False
P              False
K              False
Temperature    False
Humidity       False
PH             False
Breed          False
dtype: bool
In [6]:
print("Number of various crops: ", len(cropdf['Breed'].unique()))
print("List of crops: ", cropdf['Breed'].unique())
Number of various crops:  5
List of crops:  ['AT 356' 'BG 250' 'BG 352' 'Mottaikaruppan' 'suwandel']
In [7]:
cropdf['Breed'].value_counts()
Out[7]:
BG 250            528
suwandel          464
BG 352            396
Mottaikaruppan    390
AT 356            340
Name: Breed, dtype: int64
In [8]:
crop_summary = pd.pivot_table(cropdf,index=['Breed'],aggfunc='mean')
crop_summary.head()
Out[8]:
Humidity K N P PH Temperature
Breed
AT 356 68.582285 71.205882 136.739412 32.991765 6.013526 27.586198
BG 250 67.539950 69.685606 130.518182 33.192424 6.065707 27.965815
BG 352 70.298554 68.323232 126.328283 33.571717 6.022401 27.184925
Mottaikaruppan 67.730658 63.800000 132.569744 30.894359 6.098946 27.823929
suwandel 69.459796 68.517241 129.702586 33.967241 6.007524 27.902968
In [9]:
crop_summary_N = crop_summary.sort_values(by='N', ascending=False)
  
fig = make_subplots(rows=1, cols=3)

top = {
    'y' : crop_summary_N['N'][0:10].sort_values().index,
    'x' : crop_summary_N['N'][0:10].sort_values()
}

last = {
    'y' : crop_summary_N['N'][-10:].index,
    'x' : crop_summary_N['N'][-10:]
}

fig.add_trace(
    go.Bar(top,
           name="Most nitrogen required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=top['x']),
    
    row=1, col=1
)

fig.add_trace(
    go.Bar(last,
           name="Least nitrogen required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=last['x']),
    row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Nitrogen (N)",
                  plot_bgcolor='white',
                  font_size=12, 
                  font_color='black',
                 height=500)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
In [10]:
crop_summary_P = crop_summary.sort_values(by='P', ascending=False)
  
fig = make_subplots(rows=1, cols=2)

top = {
    'y' : crop_summary_P['P'][0:10].sort_values().index,
    'x' : crop_summary_P['P'][0:10].sort_values()
}

last = {
    'y' : crop_summary_P['P'][-10:].index,
    'x' : crop_summary_P['P'][-10:]
}

fig.add_trace(
    go.Bar(top,
           name="Most phosphorus required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=top['x']),
    
    row=1, col=1
)

fig.add_trace(
    go.Bar(last,
           name="Least phosphorus required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=last['x']),
    row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Phosphorus (P)",
                  plot_bgcolor='white',
                  font_size=12, 
                  font_color='black',
                 height=500)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
In [11]:
crop_summary_K = crop_summary.sort_values(by='K', ascending=False)
  
fig = make_subplots(rows=1, cols=2)

top = {
    'y' : crop_summary_K['K'][0:10].sort_values().index,
    'x' : crop_summary_K['K'][0:10].sort_values()
}

last = {
    'y' : crop_summary_K['K'][-10:].index,
    'x' : crop_summary_K['K'][-10:]
}

fig.add_trace(
    go.Bar(top,
           name="Most potassium required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=top['x']),
    
    row=1, col=1
)

fig.add_trace(
    go.Bar(last,
           name="Least potassium required",
           marker_color=random.choice(colorarr),
           orientation='h',
          text=last['x']),
    row=1, col=2
)
fig.update_traces(texttemplate='%{text}', textposition='inside')
fig.update_layout(title_text="Potassium (K)",
                  plot_bgcolor='white',
                  font_size=12, 
                  font_color='black',
                 height=500)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
In [12]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=crop_summary.index,
    y=crop_summary['N'],
    name='Nitrogen',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=crop_summary.index,
    y=crop_summary['P'],
    name='Phosphorous',
    marker_color='lightsalmon'
))
fig.add_trace(go.Bar(
    x=crop_summary.index,
    y=crop_summary['K'],
    name='Potash',
    marker_color='crimson'
))

fig.update_layout(title="N, P, K values comparision between crops",
                  plot_bgcolor='white',
                  barmode='group',
                  xaxis_tickangle=-45)

fig.show()
In [13]:
labels = ['Nitrogen(N)','Phosphorous(P)','Potash(K)']
fig = make_subplots(rows=1, cols=5, specs=[[{'type':'domain'}, {'type':'domain'},
                                            {'type':'domain'}, {'type':'domain'}, 
                                            {'type':'domain'}]])

AT_356_npk = crop_summary[crop_summary.index=='AT 356']
values = [AT_356_npk['N'][0], AT_356_npk['P'][0], AT_356_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="AT 356"),1, 1)

BG_250_npk = crop_summary[crop_summary.index=='BG 250']
values = [BG_250_npk['N'][0], BG_250_npk['P'][0], BG_250_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="BG 250"),1, 2)

BG_352_npk = crop_summary[crop_summary.index=='BG 352']
values = [BG_352_npk['N'][0], BG_352_npk['P'][0], BG_352_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="BG 352"),1, 3)

Mottaikaruppan_npk = crop_summary[crop_summary.index=='Mottaikaruppan']
values = [Mottaikaruppan_npk['N'][0], Mottaikaruppan_npk['P'][0], Mottaikaruppan_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="Mottaikaruppan"),1, 4)

suwandel_npk = crop_summary[crop_summary.index=='suwandel']
values = [suwandel_npk['N'][0], suwandel_npk['P'][0], suwandel_npk['K'][0]]
fig.add_trace(go.Pie(labels=labels, values=values,name="suwandel"),1, 5)

fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(
    title_text="NPK ratio for AT 356, BG 250, BG 352, Mottaikaruppan, suwandel",
    annotations=[dict(text='AT 356',x=0.06,y=0.8, font_size=15, showarrow=False),
                 dict(text='BG 250',x=0.26,y=0.8, font_size=15, showarrow=False),
                 dict(text='BG 352',x=0.50,y=0.8, font_size=15, showarrow=False),
                 dict(text='Mottaikaruppan',x=0.74,y=0.8, font_size=15, showarrow=False),
                 dict(text='suwandel',x=0.94,y=0.8, font_size=15, showarrow=False)])
fig.show()
In [14]:
crop_scatter = cropdf[(cropdf['Breed']=='AT 356') | 
                      (cropdf['Breed']=='BG 352') | 
                      (cropdf['Breed']=='BG 250') |
                     (cropdf['Breed']=='Mottaikaruppan') |
                     (cropdf['Breed']=='suwandel')]

fig = px.scatter(crop_scatter, x="Temperature", y="Humidity", color="Breed", symbol="Breed")
fig.update_layout(plot_bgcolor='white')
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()
In [15]:
fig = px.bar(crop_summary, x=crop_summary.index, y=["PH", "Humidity", "Temperature"])
fig.update_layout(title_text="Comparision between ph, temperature and humidity",
                  plot_bgcolor='white',
                 height=500)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
In [16]:
fig, ax = plt.subplots(1, 1, figsize=(15, 9))
sns.heatmap(cropdf.corr(), annot=True,cmap='Wistia' )
ax.set(xlabel='features')
ax.set(ylabel='features')

plt.title('Correlation between different features', fontsize = 15, c='black')
plt.show()
In [17]:
X = cropdf.drop('Breed', axis=1)
y = cropdf['Breed']
In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, shuffle = True, random_state = 0)
In [19]:
pip install lightgbm
Requirement already satisfied: lightgbm in c:\users\kareeshan\anaconda3\lib\site-packages (4.0.0)
Requirement already satisfied: scipy in c:\users\kareeshan\anaconda3\lib\site-packages (from lightgbm) (1.7.3)
Requirement already satisfied: numpy in c:\users\kareeshan\anaconda3\lib\site-packages (from lightgbm) (1.21.5)
Note: you may need to restart the kernel to use updated packages.
In [20]:
# build the lightgbm model
import lightgbm as lgb

model = lgb.LGBMClassifier()
model.fit(X_train, y_train)
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000094 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 655
[LightGBM] [Info] Number of data points in the train set: 1482, number of used features: 6
[LightGBM] [Info] Start training from score -1.850109
[LightGBM] [Info] Start training from score -1.368903
[LightGBM] [Info] Start training from score -1.659241
[LightGBM] [Info] Start training from score -1.732803
[LightGBM] [Info] Start training from score -1.508134
Out[20]:
LGBMClassifier()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LGBMClassifier()
In [21]:
# predict the results
y_pred=model.predict(X_test)
In [22]:
# view accuracy
from sklearn.metrics import accuracy_score

accuracy=accuracy_score(y_pred, y_test)
print('LightGBM Model accuracy score: {0:0.4f}'.format(accuracy_score(y_test, y_pred)))
LightGBM Model accuracy score: 0.8208
In [23]:
y_pred_train = model.predict(X_train)
print('Training-set accuracy score: {0:0.4f}'. format(accuracy_score(y_train, y_pred_train)))
Training-set accuracy score: 1.0000
In [24]:
# print the scores on training and test set

print('Training set score: {:.4f}'.format(model.score(X_train, y_train)))
print('Test set score: {:.4f}'.format(model.score(X_test, y_test)))
Training set score: 1.0000
Test set score: 0.8208
In [25]:
# view confusion-matrix
# Print the Confusion Matrix and slice it into four pieces

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(y_test,y_pred))
plt.title(all_sample_title, size = 15);
plt.show()
In [26]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
                precision    recall  f1-score   support

        AT 356       0.93      0.74      0.82       107
        BG 250       0.78      0.83      0.80       151
        BG 352       0.82      0.89      0.86       114
Mottaikaruppan       0.86      0.88      0.87       128
      suwandel       0.76      0.76      0.76       136

      accuracy                           0.82       636
     macro avg       0.83      0.82      0.82       636
  weighted avg       0.83      0.82      0.82       636

In [27]:
newdata=model.predict([[90, 42, 43, 20.879744, 75, 5.5]])
newdata
Out[27]:
array(['BG 352'], dtype=object)
In [28]:
pip install joblib
Requirement already satisfied: joblib in c:\users\kareeshan\anaconda3\lib\site-packages (1.3.1)
Note: you may need to restart the kernel to use updated packages.
In [31]:
#from lightgbm import LGBMClassifier
#from joblib import dump

#lgbm_classification = LGBMClassifier()
#lgbm_classification.fit(X_train, y_train)

#joblib.dump(lgbm_classification, 'train_model2.joblib')#

from lightgbm import LGBMClassifier
from joblib import dump

lgbm_classification = LGBMClassifier()
lgbm_classification.fit(X_train, y_train)

dump(lgbm_classification, 'train_model2.joblib')
[LightGBM] [Warning] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000284 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 655
[LightGBM] [Info] Number of data points in the train set: 1482, number of used features: 6
[LightGBM] [Info] Start training from score -1.850109
[LightGBM] [Info] Start training from score -1.368903
[LightGBM] [Info] Start training from score -1.659241
[LightGBM] [Info] Start training from score -1.732803
[LightGBM] [Info] Start training from score -1.508134
Out[31]:
['train_model2.joblib']
In [ ]: